*************************************************
*********PREPARE DATASET FOR ANALYSIS************
*************************************************

use "$data/endline_clean_merged.dta", clear

******************
***Clean up DKs***
******************

foreach var of varlist _all { 
	capture confirm numeric variable `var'
	if !_rc {
		quietly: replace `var' = .w if `var'==9999 //don't want to say
		quietly: replace `var' = .d if `var'==8888|`var'==888|`var'==88888|`var'==8885 //don't know
		quietly: replace `var' = .n if `var'==7777 //not relevant / other exception
	}
}

global sym_vars = "cit_role1 cit_role2 cit_role3 pubgoods1 aid1 aid3"
foreach var in $sym_vars{
replace `var'=. if `var'==5
}

************************************
***Productive capacity correction***
************************************

* I discovered an enumerator misunderstanding of these questions about 2 weeks into enumeration. This code corrects those mistakes.

forvalues x = 1/3{
replace gov_cap`x' = . if capacity_changed==0 
g gov_cap`x'b = gov_cap`x'
replace gov_cap`x'b =. if gov_cap`x'==3
replace gov_cap`x'b=3 if gov_cap`x'==4
replace gov_cap`x'b=4 if gov_cap`x'==5
label value gov_cap`x'b sa_a_n_d_sd_cap
}

label var gov_cap1b "If Gov wants to improve a road outside of Kananga, it will do this quickly and without problems."
label var gov_cap2b "If Gov wants to provide electricity to all Kananga, it will do this quickly and without problems."
label var gov_cap3b "If Gov wants to find and apprehend a criminal, it will do this quickly and without problems."

drop gov_cap1 gov_cap2 gov_cap3

*****************************************************************
***Reverse variables: increasing in demand for good governance***
*****************************************************************

global reverse_vars = "others4 bribe_freq pol2 pol3 pol5 pol7 pol8 pol10 pol11 pol13 pol14 know1 know2 know4 bus4 title3 trust8 trust5 trust6 trust7 gov_perform dgrkoc_perform cit_role1 cit_role2 cit_role3 transparent1 transparent2 pubgoods1 aid1 aid3 gov_cap1b gov_cap2b gov_cap3b punish5 punish9"

foreach var in $reverse_vars{
revrs `var', replace
}

**********************
***Basic covariates***
**********************

g female = sex==2

g age2 = age*age

g edu_yrs = 0
replace edu_yrs = 1 if edu==1
replace edu_yrs = 1 if edu==2
replace edu_yrs = 6 if edu==3
replace edu_yrs = 12 if edu==4
replace edu2 = 0 if edu==0|edu==1
replace edu_yrs=edu_yrs+edu2
g edu_yrs_orig = edu_yrs //One will be standardized, the other measured in years
g no_schooling = edu<2
replace no_schooling=. if edu==.d

g unemployed = job1==0|job1==4

g chef_job = work_gov3_9==1 
replace chef_job=1 if inlist(work_gov3_other, "Chef d'avenu", "Chef d'avenue", "Chef davenue", "Chef de localite RVA", "Chef de localité", "Chef de sous cellule", "Chef de sous localite")

g luluwa = tribe=="LULUWA"

g native_speaker = inlist(tribe,"LULUWA", "LUBA","LUNTU")

*Wealth and income
g house1 = 0 if walls ==1|walls==2|walls==0
replace house1 = 1 if walls ==3|walls==4

replace roof2=0 if roof2==.
g roof_sum = roof + roof2
replace roof_sum = 9 if roof==6
replace roof_sum = 10 if roof==7

egen possessions = rowtotal(possessions_1 possessions_2 possessions_3 possessions_4 possessions_5 possessions_6), missing

g vehicle = possessions_1==1 | possessions_2==1

g any_electricity = elect1

g inc_dollars = inc_mo/1000

global income = "inc_mo inc_wk transport airtime"
global wealth = "floor roof_sum walls fence accessible elect1 possessions"

g any_renters = renters>0

*Make wealth and income indices
foreach index in wealth income {
foreach var in $`index'{
cap replace `var' = `var'_orig
cap g `var'_orig = `var'
sum `var'
replace `var' = (`var'-`r(mean)')/(`r(sd)') //standardize
}
egen `index' = rowtotal($`index'), missing
sum `index'
replace `index' = (`index' -`r(mean)')/(`r(sd)')  //standardize index
}

gen log_inc = log(income+1)

replace wealth = . if roof ==.
g wealth_imputed= roof==.

*Impute missing wealth and house type data (see Online Appendix Section A6.1)
levelsof a7 if wealth==. 

foreach poly in `r(levels)'{
local poly_minus1 = `poly'-1
local poly_plus1 = `poly'+1

reg wealth floor log_inc elect1 possessions if a7>=`poly_minus1' & a7 <=`poly_plus1' // extracting betas for the neighborhood
replace wealth = _b[floor]*floor+_b[log_inc]*log_inc+_b[elect1]*elect1+_b[possessions]*possessions +_b[_cons] if a7==`poly' & wealth==.
reg wealth log_inc elect1 possessions if a7>=`poly_minus1' & a7 <=`poly_plus1' // repeat for the 5 HHs where floor info is missing
replace wealth = _b[log_inc]*log_inc+_b[elect1]*elect1+_b[possessions]*possessions +_b[_cons] if a7==`poly' & wealth==.

reg house1 floor log_inc elect1 possessions if a7>=`poly_minus1' & a7 <=`poly_plus1' 
replace house1 = _b[floor]*floor+_b[log_inc]*log_inc+_b[elect1]*elect1+_b[possessions]*possessions +_b[_cons] if a7==`poly' & house1==.
replace house1 = round(house1,1)
reg house1 log_inc elect1 possessions if a7>=`poly_minus1' & a7 <=`poly_plus1' 
replace house1 = _b[log_inc]*log_inc+_b[elect1]*elect1+_b[possessions]*possessions +_b[_cons] if a7==`poly' & house1==.
replace house1 = round(house1,1)
}

g programXhouse = house1*program
g wealth_n =  wealth  

*****************
***Tax payment***
*****************

*Independent assessments of tax rates
g taxrate_enum = 2000 if taxrate==1|taxrate==4
replace taxrate_enum=6600 if taxrate==2
replace taxrate_enum=20000 if taxrate==3
replace taxrate_enum=. if taxrate==.

replace taxrate_inspector1 = taxrate_inspector2 if taxrate_inspector2!=. 
replace taxrate_inspector1=20000 if taxrate_inspector1==5555

g taxrate_union = taxrate_inspector1 //Combine rate assessments b/c we are missing the tax ministry inspector assessments for after the violence started
replace taxrate_union = taxrate_enum if taxrate_inspector1==.

************
***Bribes***
************

*Bribe self reports
g bribe_self = bribe==1
replace bribe_self = 1 if arrange ==1
replace paid_amt = 0 if paid_self==0
replace bribe_amt=0 if bribe_amt==.

*Bribe measure that includes self reports but also cases in which person paid more than 2000 but should have paid 6600
g bribe_self_reclassify = bribe_self
replace bribe_self_reclassify = 1 if (paid_amt>2000 & paid_amt<=6500) & paid_receipt_union==1 & taxrate_union==6500 

*************************************************
***Other interactions with collectors/campaign***
*************************************************

*Self reported visits
g visited = tax14==1
replace visited=0 if tax17==0
replace visited=0 if tax19==0

g visits = tax17
replace visits=0 if visited==0
replace visits=. if visited==.
replace visits=2 if visits==20
replace visits=5 if visits==50|visits==52
replace visits=1 if visited==1 & (visits==0|visits==.)

*Registered and not paid
g register_not_pay = registered==1 & paid_receipt_union==0
replace register_not_pay = 0 if register_not_pay==.

*Collector time spent at house
g col_time_spent = tax19
replace col_time_spent = 0 if visited==0
replace col_time_spent = 350 if tax19==3500
g col_time_spent_log = log(1+col_time_spent)

*Observed use of tablet/printer
g observed_tablet = tax20
replace observed_tablet = 0 if visited==0

*Number of different collectors who visited
g num_cols = tax21
replace num_cols = 0 if visited==0

*Knowledge of property tax and campaign

g know_property_tax=tax1==1
replace know_property_tax=0 if tax1==8888

g know_taxrate = tax3==taxrate_union
replace know_taxrate = 1 if tax3==0 & taxrate==4 //exempted
replace know_taxrate = 1 if taxrate_union==20000 & tax3>10000 //measured 

g know_campaign = tax7==1

g know_dgrkoc = tax11==1|tax11==2

g know_receipt = tax4==3

global know_taxes_index = "know_property_tax know_taxrate know_campaign know_dgrkoc know_receipt"

*******************
***Participation***
*******************

*Either townhall or evaluation
g townhall_or_eval = townhall==1 | evaluation==1

*Both
g townhall_and_eval = townhall==1 & evaluation==1

*Standardized combination
egen participation_index = rowtotal(townhall evaluation), missing

*Critical and commented evaluations
g eval_box_disapprove = eval4_box==2
replace eval_box_disapprove=. if evaluation==.
 
g eval_comment = eval_comment_en!="" 

***********************************
***Calculate participation costs***
***********************************

*Assume independent trips to townhall and drop box
g cost_participation = transport_amount if townhall_or_eval==1
replace cost_participation = transport_amount*2 if townhall_and_eval==1
replace cost_participation = 0 if townhall_or_eval==0

*Transform as share of daily income

g inc_day = inc_wk_orig/7 //weekly measure
replace inc_day = inc_mo_orig/30 if inc_day==0 |inc_day==. //combine with monthly measure
*winsorize to reduce impact of outliers
winsor inc_day, gen(inc_day_w) high p(.05) 
*calculate polygon average
bysort a7: egen inc_day_poly_mean = mean(inc_day_w) 

*Calculate participation cost as a share of average income
g cost_participation_rel_w = cost_participation/inc_day_poly_mean

sum cost_participation_rel_w if townhall_or_eval==1
local avg_cost_perc_participators = round(`r(mean)', .001)

*Now factoring in opportunity costs of time spent in 3 hour meeting plus 1 hour for evaluation form

*Assume 10 hours of work per day to achieve daily income
replace inc_day_w = inc_day_poly_mean if inc_day_w==. //8 missing
g inc_hour = inc_day_w/10
g cost_participation2 = cost_participation
replace cost_participation2 = cost_participation + 3*inc_hour if townhall==1 // average townhall meeting 3 hours 
replace cost_participation2 = cost_participation + 1*inc_hour if evaluation==1 & (townhall==0|townhall==.) //assume 1 hour to fill in evaluation, go to drop box, find it, etc
replace cost_participation2 = cost_participation + 4*inc_hour if townhall_and_eval==1

g cost_participation2_rel_w = cost_participation2/inc_day_poly_mean

*Account for taxi sharing: 6.7\% in control and 13\%
g cost_participation_adjusted  = cost_participation * .96 if program==0
replace cost_participation_adjusted  = cost_participation * .94 if program==1

g cost_participation_adj_rel = cost_participation_adjusted/inc_day_poly_mean
sum cost_participation_adj_rel if townhall_or_eval==1

g cost_participation2_adjusted  = cost_participation_adjusted
replace cost_participation2_adjusted = cost_participation_adjusted + 3*inc_hour if townhall==1 
replace cost_participation2_adjusted = cost_participation_adjusted + 1*inc_hour if evaluation==1 & (townhall==0|townhall==.) 
replace cost_participation2_adjusted = cost_participation_adjusted + 4*inc_hour if townhall_and_eval==1

g cost_participation2_adj_rel = cost_participation2_adjusted/inc_day_poly_mean
sum cost_participation2_adj_rel if townhall_or_eval==1

drop inc_day_w inc_day_poly_mean inc_hour inc_day cost_participation_adjusted cost_participation2_adjusted

****************************
***National participation***
****************************

*Party
g party = pol6+3
replace party =0 if pol6==0
replace party = pol7-1 if pol6==0|pol6==1
replace party = 0 if pol7==.w

*Protest
g protest = pol11
replace protest=1 if pol10==1

*Rally
g rally = pol14
replace rally=1 if pol13==1

*March
g march = pol8==2

global national_participation = "pol5 march party protest rally"

**************************
***Interest in politics***
**************************

*Choose information
g infotest1_gov = infotest1==1
g infotest2_gov = infotest2==1
g infotest3_gov = infotest3==1

global interest_politics = "pol2 pol3 know1 know2 know5 infotest1_gov infotest2_gov infotest3_gov" 

*********************************
***Role of citizen in politics***
*********************************

global citizen_role_politics = "cit_role1 cit_role2 cit_role3"

********************************
****** Views of government******
********************************

*Responsibility to provide public goods
forvalues x = 1(1)7{
g provide`x'_gov = provide`x'==2 //Prov. Gov. 
replace provide`x'_gov=. if provide`x'==.|provide`x'==.d|provide`x'==.w
label var provide`x'_gov "Provincial government should provide - sector `x'"
}

global gov_provide = "provide1_gov provide2_gov provide3_gov provide4_gov provide5_gov provide6_gov provide7_gov pubgoods1 aid1 aid3"
global gov_provide_sectors = "provide1_gov provide2_gov provide3_gov provide4_gov provide5_gov provide6_gov provide7_gov"
global gov_provide_aid = "pubgoods1 aid1 aid3"

*Current level of provision of public goods by provincial government
global level_provide = "level1 level2 level3 level4 level5 level6 level7"

*Make alternative versions because these indices will be standardized
foreach var in $gov_provide_sectors $level_provide trust5 trust6 funds_spent gov_perform dgrkoc_perform pol6 pol10 {
	g `var'_orig = `var'
}

*Performance of tax ministry and government
global eval_trust_gov = "gov_perform trust5"
global eval_trust_tax = "dgrkoc_perform trust6"

*Perceived integrity of government
replace funds_deposited=. if funds_deposited==999 //fix DKs
replace funds_spent=. if funds_spent==999|funds_spent==88 //fix DKs

*Perceived transparency of government
global transparency = "transparent1 transparent2"

***********************************
***Perceived Government Capacity***
***********************************

*Information capacity
global capacity_info ="info1 info2 info4 info5"

*Capacity to provide public goods
global capacity_provide = "gov_cap1b gov_cap2b gov_cap3b"

*Capacity to punish tax evaders
global capacity_punish ="punish5 punish9"

*Perceived compliance
g perceived_compliance = 0 if others4==1
replace perceived_compliance = .25 if others4==2
replace perceived_compliance = .5 if others4==3
replace perceived_compliance = .75 if others4==4
replace perceived_compliance = 1 if others4==5

********************
***Other analysis***
********************

*Chef index
global engage_chiefs = "consult_chef salongo know4"
global view_chiefs = "chef_respond trust8"

*Formalization

global formalization = "service1 service2 bus4 title3"

*Experimenter effects
g no_phone = (phone1==.|phone1==999999999) & (phone2==.|phone2==999999999)
g bad_phone = phonecheck==0 | phonecheck2==0

*Time periods
forv x=5(5)20{
xtile time_xtile`x' = time_lag, nq(`x')
}

*Informational flier recipients in control only
g control_flier_receiver = program==0 & flier==1

***********************
***Construct indices***
***********************

global indices_to_make = "eval_trust_gov eval_trust_tax know_taxes_index national_participation interest_politics citizen_role_politics engage_chiefs view_chiefs gov_provide gov_provide_sectors gov_provide_aid level_provide capacity_provide capacity_punish capacity_info formalization transparency"

foreach index in $indices_to_make{
foreach var in $`index'{
sum `var'
replace `var' = (`var'-`r(mean)')/(`r(sd)') //standardize variables
}
egen `index' = rowtotal($`index'), missing
sum `index'
replace `index' = (`index' -`r(mean)')/(`r(sd)')  //standardize index
}

global vars_to_standardize = "participation_index others4 trust7 knows_employer old_respondent funds_deposited  funds_spent"

foreach var in $vars_to_standardize{
sum `var'
replace  `var' = ( `var'-`r(mean)')/(`r(sd)') //standardize variables
}

global vars_to_normalize = "wealth_n"

foreach var of varlist $vars_to_normalize{ 
sum `var'
replace `var' = (`var'-`r(min)')/(`r(max)'-`r(min)') //normalize variables
}

***********************
***Label variables*****
***********************

label var double_bonus "Double collector bonus"
label var paid_jive_u_avg "Payment propensity (JIVE)"
label var registered_jive_u_avg "Registration propensity (JIVE)"
label var house1 "House quality"
label var female Female
label var age2 "Age squared"
label var born_kga "Born in Kananga"
label var edu_yrs "Years of education (standardized)"
label var edu_yrs_orig "Years of education"
label var no_schooling "No formal education"
label var chef_job "Job as local city chief"
label var luluwa "Majority ethnicity"
label var native_speaker "Tshiluba mother tongue"
label var roof_sum "Index of roof quality (standardized)"
label var possessions "Index of possessions (standardized)"
label var possessions_orig "Index of possessions"
label var vehicle "Owns vehicle"
label var any_renters "Any renters in compound"
label var floor_orig "Quality of floor"
label var roof_sum_orig "Index of roof quality"
label var walls_orig "Quality of walls"
label var fence_orig "Quality of fence"
label var accessible "Accessibility of compound"
label var accessible_orig "Accessibility of compound"
label var elect1_orig "Any source of electricity"
label var wealth "Wealth index (standardized)"
label var inc_mo_orig "Monthly income"
label var inc_wk_orig "Weekly income"
label var transport_orig "Weekly expenditure on transportation"
label var airtime_orig "Weekly expenditure on phone credit"
label var income "Weekly income"
label var log_inc "Log weekly income"
label var wealth_imputed "Imputed wealth index"
label var programXhouse "Program X House quality"
label var wealth_n "Wealth index (normalized)"
label var taxrate_enum "Assessment of tax rate - enumerator"
label var taxrate_union "Independent assessment of tax rate"
label var bribe_self "Paid bribe (self reported)"
label var register_not_pay "Registered but non-compliant with tax"
label var col_time_spent "Collector time spent at house in total"
label var know_property_tax "Knows property tax"
label var know_taxrate "Knows property tax rate"
label var know_campaign "Knows property tax campaign of 2016"
label var know_dgrkoc "Knows provincial tax ministry"
label var know_receipt "Knows correct type of receipt"
label var eval_comment "Evaluation comment"
label var cost_participation "Cost of participation (transport only)"
label var cost_participation2 "Cost of participation (transport + opp cost)"
label var cost_participation_rel_w "Cost of participation (transport only) as share of daily income"
label var cost_participation2_rel_w "Cost of participation (transport + opp) as share of daily income"
label var cost_participation_adj_rel "Cost of participation (transport only) as share of daily income adjusted for taxi sharing"
label var cost_participation2_adj_rel "Cost of participation (transport + opp) as share of daily income adjusted for taxi sharing"
label var party "Member of political party"
label var protest "Engaged in political protests"
label var rally "Engaged in political rallies"
label var march "Engaged in political marches"
label var infotest1_gov "Demand for political information 1"
label var infotest2_gov "Demand for political information 2"
label var infotest3_gov "Demand for political information 3"
label var unemployed "Unemployed"
label var inc_dollars "Household monthly income (USD)"
label var house1 "Lives in non-mudbrick house"
label var possessions_1 "Owns motorbike"
label var possessions_2 "Owns car or truck"
label var work_gov "Works for government"
label var any_electricity "Has any source of electricity"
label var bribe_self_reclassify "Paid bribe"
label var bribe_amt "Bribe amount"
label var bribe_freq "Others bribe"
label var bribe_amt1 "Going rate"
label var visited "Visited by tax collector" //self report
label var visits "Number of collector visits"
label var col_time_spent_log "Collector time spent (log)"
label var observed_tablet "Observed tablet/printer"
label var num_cols "Number of collectors"
label var townhall_or_eval "Townhall or evaluation"
label var townhall_and_eval "Townhall and evaluation"
label var participation_index "Index (townhall and evaluation)"
label var evaluation "Evaluation form submission"
label var gov_provide "Resp. for public goods provision"
label var capacity_punish "Ability to punish tax evaders"
label var capacity_provide "Ability to provide public goods"
label var capacity_info "Information about taxpayers"
label var others4 "Perceived citizen compliance"
label var funds_deposited "Integrity of tax collectors"
label var trust5_orig "Trust in provincial government"
label var trust6_orig "Trust in tax ministry"
label var trust7 "Trust in Foreign Researchers"
label var knows_employer "Remembers Researcher"
label var old_respondent "Past Participant"
label var eval_box_disapprove "Critical eval"
label var funds_spent_orig "Amount of $1000 spent well"
label var gov_perform_orig "Performance of provincial government"
label var dgrkoc_perform_orig "Performance of tax ministry"
label var pol6_orig "Member of political party"
label var pol10_orig "Ever participated in protest"
label var perceived_compliance "Perceived property tax compliance"
label var no_phone "Has no mobile phone"
label var bad_phone "Gave fake phone number"
label var control_flier_receiver "Flier receipient in control"
label var eval_trust_gov "Performance of government"
label var eval_trust_tax "Performance of tax ministry"
label var know_taxes_index "Knoweldge of property tax"
label var national_participation "Engagement with national politics"
label var interest_politics "Interest in politics"
label var citizen_role_politics "Citizen role in politics"
label var engage_chiefs "Engagement with city chiefs"
label var view_chiefs "Views of city chiefs"
label var gov_provide_sectors "Resp. for public goods provision (sector-based)"
label var gov_provide_aid "Resp. for public goods provision (hypotheticals)"
label var level_provide "Current public goods provision"
label var formalization "Demand for formalization"
label var transparency "Transparency of government"

forv x=1/7{
	label var level`x'_orig "Perceived provision by provincial gov in sector `x'"
}

forv x=1/7{
	label var provide`x'_gov_orig "Provincial government should provide - sector `x'"
}

*****************************
** Additional value labels **
*****************************

label define others_bribe 5 "All people", modify
label define others_bribe 4 "Most people", modify
label define others_bribe 3 "Some people", modify
label define others_bribe 2 "A few people", modify
label define others_bribe 1 "No people", modify
label value bribe_freq others_bribe

**************************
** Separate two samples **
**************************

*Repeat baseline sample (no costly participation outcomes - for compliance paper i.e. Weigel 2018)
preserve

keep if sample2==1

merge 1:1 s3_code using "$data/baseline_variables_sample2.dta", keep(3) nogen

*Create analogous baseline and endline variables for these respondents
revrs f14_s1 pol6_s1, replace

g trust_gov1=f14_s1
g trust_gov2=trust5_orig

rename gov2_s1 gov_integrity1
rename funds_spent_orig gov_integrity2

rename pol6_s1 eval_gov1
rename gov_perform_orig eval_gov2

local counter = 1
foreach x in 78 79 80 81 83 84{
gen provide`x'_gov_s1 = e`x'_s1==2
replace provide`x'_gov_s1=.  if e`x'_s1==. |e`x'_s1==.d|e`x'_s1==.w
local counter = `counter' +1
}

global gov_provide1 = "provide78_gov_s1 provide79_gov_s1 provide80_gov_s1 provide81_gov_s1 provide83_gov_s1 provide84_gov_s1"
global gov_provide2 = "provide2_gov_orig provide3_gov_orig provide4_gov_orig provide5_gov_orig provide6_gov_orig provide7_gov_orig"

global indices_to_make = "gov_provide1 gov_provide2"

foreach index in $indices_to_make{
foreach var in $`index'{
sum `var'
replace `var' = (`var'-`r(mean)')/(`r(sd)') 
}
egen `index' = rowtotal($`index'), missing
sum `index'
replace `index' = (`index' -`r(mean)')/(`r(sd)') 
}

global vars_to_standardize = "gov_integrity1 gov_integrity2 trust_gov1 trust_gov2 eval_gov1 eval_gov2"

foreach var in $vars_to_standardize {
sum `var'
replace  `var' = ( `var'-`r(mean)')/(`r(sd)') 
}

keep $indices_to_make $vars_to_standardize program a7 s3_code stratum 

*Create panel
reshape long gov_provide trust_both trust_tax trust_gov eval_gov eval_tax party protest  gov_integrity, i(s3_code) j(post)

gen endline = post==2

label var endline "Endline"

save "$data/endline_clean_merged_sample2.dta", replace

restore

*Main dataset contains the new endline sample only 
drop if sample2==1

save "$data/endline_clean_merged.dta", replace


